1 ) ☘️ Introduction to HCC and TACE-based treatment.

1.1 Project Overview

💡 Research Context

This script analyzes targeted sequencing data from patients with hepatocellular carcinoma (HCC) who have been treated with transarterial chemoembolization (TACE) as part of my PhD thesis research project(Chapter 1️⃣).

1.2 HCC and TACE Background

👨🏻‍⚕️ Clinical Background

Hepatocellular carcinoma (HCC) is one of the most common malignancies worldwide, with increasing incidence in many countries. Transarterial chemoembolization (TACE) is a standard treatment for intermediate-stage HCC patients according to the Barcelona Clinic Liver Cancer (BCLC) staging system.

1.3 Research Objectives

🎯 Study Aims

This analysis aims to investigate the genetic mutational landscape of HCC patients treated with TACE and to identify potential biomarkers for treatment response and survival outcomes.

1.4 Analysis Methods

🍏 Analytical Approach HCC_TACE

We use maftools for mutation analysis and survival analysis techniques to evaluate associations between mutated genes and clinical outcomes including overall survival (OS) and progression-free survival (PFS).

2 ) 🍀 Data Preparation.

2.1 Load Required Packages

# Set CRAN mirror to avoid errors
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Function to safely install and load packages
install_and_load <- function(packages, bioconductor = FALSE) {
  for (package in packages) {
    # Check if package is installed
    if (!requireNamespace(package, quietly = TRUE)) {
      message(paste("Installing package:", package))
      
      # Install from appropriate source
      if (bioconductor) {
        if (!requireNamespace("BiocManager", quietly = TRUE)) {
          install.packages("BiocManager")
        }
        BiocManager::install(package, update = FALSE, ask = FALSE)
      } else {
        install.packages(package, repos = "https://cloud.r-project.org")
      }
    }
    
    # Load package
    suppressWarnings(suppressPackageStartupMessages(
      library(package, character.only = TRUE)
    ))
  }
}

# Install and load remotes package first
install_and_load("remotes")

# Regular CRAN packages
cran_packages <- c("survival", "lubridate", "ggfortify", "flexsurv", "remotes", 
                   "evaluate", "knitr", "dplyr", "ggplot2", "magrittr", "grid",
                   "broom.helpers", "data.table", "plotly", "DT", "flexdashboard",
                   "htmlwidgets", "visNetwork", "kableExtra", "RColorBrewer", "DT")

# Bioconductor packages
bioc_packages <- c("ggsurvfit", "gtsummary", "tidycmprsk", "ranger", 
                   "survminer", "survcomp", "maftools")

# Install and load packages
install_and_load(cran_packages)
install_and_load(bioc_packages, bioconductor = TRUE)

# Create bibliography directory if it doesn't exist
if (!dir.exists("bib")) {
  dir.create("bib")
}

# Generate package bibliography
all_packages <- c(.packages(), 'rmarkdown', 'knitr')
knitr::write_bib(x = all_packages, file = 'bib/packages.bib')

# Install maftools from GitHub if needed
if (!requireNamespace("maftools", quietly = TRUE)) {
  tryCatch({
    remotes::install_github("PoisonAlien/maftools", quiet = TRUE)
    library(maftools)
  }, error = function(e) {
    message("Couldn't install maftools from GitHub, trying Bioconductor version instead")
  })
}

2.2 Data Import and Processing

# Define file paths
maf_file_path <- "/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Tables/HCC_TACE_maftools.maf"
clinical_data_path <- "/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Tables/ClinicData_Maf_Final.tsv"

# Check if files exist
maf_exists <- file.exists(maf_file_path)
clinical_exists <- file.exists(clinical_data_path)

if (!maf_exists || !clinical_exists) {
  warning("One or more files not found. Please check the paths.")
  if (!maf_exists) warning(paste("MAF file not found:", maf_file_path))
  if (!clinical_exists) warning(paste("Clinical data file not found:", clinical_data_path))
} else {
  # Read MAF file with all mutation types
  HCC_Final <- read.maf(
    maf = maf_file_path,
    clinicalData = clinical_data_path,
    rmFlags = FALSE,
    useAll = TRUE,
    verbose = TRUE,
    isTCGA = FALSE,
    vc_nonSyn = c(
      "Missense_Mutation", "5'UTR", "3'UTR", "5'Flank", "In_Frame_Del",
      "Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Nonsense_Mutation", 
      "Intron", "Silent"
    )
  )
  
  # Read MAF file without intron/silent mutations
  HCC_Final_non_intron <- read.maf(
    maf = maf_file_path,
    clinicalData = clinical_data_path,
    rmFlags = FALSE,
    useAll = TRUE,
    verbose = TRUE,
    isTCGA = FALSE,
    vc_nonSyn = c(
      "Missense_Mutation", "5'UTR", "3'UTR", "5'Flank", "In_Frame_Del",
      "Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Nonsense_Mutation"
    )
  )
  
  # Read MAF file with default non-synonymous definitions
  HCC_Final_non_synonymous <- read.maf(
    maf = maf_file_path,
    clinicalData = clinical_data_path,
    rmFlags = FALSE,
    useAll = TRUE,
    verbose = TRUE,
    isTCGA = FALSE,
    vc_nonSyn = NULL
  )
  
  # Load clinical data
  ClinicalData <- read.delim(clinical_data_path)
  ClinicalData <- as.data.table(ClinicalData)
  
  # Associate clinical data with MAF object
  HCC_Final@clinical.data <- ClinicalData
  HCC_Final_non_intron@clinical.data <- ClinicalData
  HCC_Final_non_synonymous@clinical.data <- ClinicalData
  
  # Print summary of loaded data
  cat("Data successfully imported:\n")
  cat(paste0("- Total samples: ", length(unique(HCC_Final@clinical.data$Tumor_Sample_Barcode)), "\n"))
  cat(paste0("- Total patients: ", length(unique(HCC_Final@clinical.data$Pateint)), "\n"))
  cat(paste0("- Total mutations (all types): ", nrow(HCC_Final@data), "\n"))
  cat(paste0("- Total non-intron mutations: ", nrow(HCC_Final_non_intron@data), "\n"))
  cat(paste0("- Total non-synonymous mutations: ", nrow(HCC_Final_non_synonymous@data), "\n"))
  
  # List of HCC genes of interest
  HCC_genes <- c(
    "APC", "ARID1A", "ARID2", "ATM", "AXIN1", "CDKN2A", "CTNNB1", "HNF1A", 
    "JAK1", "KEAP1", "LZTR1", "PIK3CA", "PTEN", "RB1", "SF3B1", 
    "SMARCA4", "TERT", "TP53"
  )
if (!require("remotes")) install.packages("remotes")
remotes::install_github("uham-bio/UHHformats", build_vignettes = TRUE)
  # Define color palette for mutation types
  fabcolors <- RColorBrewer::brewer.pal(n = 12, name = 'Set3')
  names(fabcolors) <- c(
    "Missense_Mutation", "5'UTR", "3'UTR", "5'Flank", "In_Frame_Del",
    "Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Intron", 
    "Nonsense_Mutation", "Silent"
  )
  fabcolors <- list(Variant_Classification = fabcolors)
}
## -Reading
## -Validating
## -Silent variants: 9 
## -Summarizing
## -Processing clinical data
## -Finished in 0.078s elapsed (0.064s cpu) 
## -Reading
## -Validating
## -Silent variants: 155 
## -Summarizing
## -Processing clinical data
## -Finished in 0.051s elapsed (0.047s cpu) 
## -Reading
## -Validating
## -Silent variants: 245 
## -Summarizing
## -Processing clinical data
## -Finished in 0.058s elapsed (0.055s cpu) 
## Data successfully imported:
## - Total samples: 60
## - Total patients: 30
## - Total mutations (all types): 299
## - Total non-intron mutations: 153
## - Total non-synonymous mutations: 63
Research Context (Click to expand)

This code imports and processes mutation data from MAF (Mutation Annotation Format) files for hepatocellular carcinoma (HCC) with these enhancements:

Three Different MAF Objects

The code creates three separate objects with different filtering criteria:

  1. HCC_Final: Includes ALL mutation types (including silent mutations and introns)
  2. HCC_Final_non_intron: Excludes intron and silent mutations
  3. HCC_Final_non_synonymous: Uses default non-synonymous definitions (most strict 😤)

Clinical Data Processing

  • Associates clinical data with all three MAF objects
  • Creates a dynamic summary display showing sample and mutation counts

2.3 SubSet Chorots

#==============================================================================
# FUNCTION DEFINITIONS
#==============================================================================
#' Create MAF subsets based on clinical queries
#' 
#' @param maf MAF object to subset
#' @param subset_definitions Named list of query expressions
#' @return None (assigns objects to global environment)
create_named_subsets <- function(maf, subset_definitions) {
  for (name in names(subset_definitions)) {
    assign(name, subsetMaf(maf = maf, clinQuery = subset_definitions[[name]]), envir = .GlobalEnv)
  }
}

#' Calculate comprehensive statistics for MAF subsets
#' 
#' @param maf_objects List of MAF objects
#' @param reference_maf Reference MAF for percentage calculations
#' @param category_map Mapping of subset names to categories
#' @return Data frame with statistics for each subset
calculate_maf_stats <- function(maf_objects, reference_maf = NULL, category_map = NULL) {
  # Initialize an empty list to store individual data frames
  stats_list <- list()
  
  # Get reference statistics if available
  ref_samples <- NA
  ref_mutations <- NA
  if(!is.null(reference_maf) && inherits(reference_maf, "MAF")) {
    ref_samples <- length(unique(reference_maf@clinical.data$Tumor_Sample_Barcode))
    ref_mutations <- nrow(reference_maf@data)
  }
  
  # Process each subset
  for(name in names(maf_objects)) {
    tryCatch({
      maf_obj <- maf_objects[[name]]
      
      # Get category if provided
      category <- "Other"
      if(!is.null(category_map) && name %in% names(category_map)) {
        category <- category_map[[name]]
      }
      
      # Calculate basic statistics
      samples <- unique(maf_obj@clinical.data$Tumor_Sample_Barcode)
      n_samples <- length(samples)
      n_mutations <- nrow(maf_obj@data)
      
      # Skip if no data
      if(n_samples == 0 || n_mutations == 0) {
        stats_list[[name]] <- tibble(
          Category = category,
          Subset = name,
          Samples = n_samples,
          Mutations = n_mutations,
          MedianMutsPerSample = 0,
          TopGene = "None",
          TopGeneMutationCount = 0,
          SamplePct = ifelse(!is.na(ref_samples), round(n_samples/ref_samples*100, 1), NA),
          MutationPct = ifelse(!is.na(ref_mutations), round(n_mutations/ref_mutations*100, 1), NA)
        )
        next
      }
      
      # Calculate mutation metrics
      muts_per_sample <- table(maf_obj@data$Tumor_Sample_Barcode)
      median_muts <- median(as.numeric(muts_per_sample))
      
      # Find top gene
      gene_counts <- sort(table(maf_obj@data$Hugo_Symbol), decreasing = TRUE)
      top_gene <- names(gene_counts)[1]
      top_gene_count <- gene_counts[1]
      
      # Calculate percentages
      sample_pct <- ifelse(!is.na(ref_samples), round(n_samples/ref_samples*100, 1), NA)
      mut_pct <- ifelse(!is.na(ref_mutations), round(n_mutations/ref_mutations*100, 1), NA)
      
      # Create tibble for this subset (tibbles don't have row names)
      stats_list[[name]] <- tibble(
        Category = category,
        Subset = name,
        Samples = n_samples,
        Mutations = n_mutations,
        MedianMutsPerSample = median_muts,
        TopGene = top_gene,
        TopGeneMutationCount = top_gene_count,
        SamplePct = sample_pct,
        MutationPct = mut_pct
      )
    }, error = function(e) {
      warning(paste("Error processing", name, ":", e$message))
    })
  }
  
  # Combine using bind_rows from dplyr to avoid row names issues
  if(length(stats_list) > 0) {
    stats_df <- bind_rows(stats_list)
  } else {
    stats_df <- tibble(
      Category = character(),
      Subset = character(),
      Samples = integer(),
      Mutations = integer(),
      MedianMutsPerSample = numeric(),
      TopGene = character(),
      TopGeneMutationCount = integer(),
      SamplePct = numeric(),
      MutationPct = numeric()
    )
  }
  
  return(stats_df)
}

#==============================================================================
# SUBSET DEFINITIONS
#==============================================================================
# Define cohort categories
subset_categories <- list(
  "Treatment" = c("pre_tace_samples", "post_tace_samples", "progression_samples"),
  "Etiology" = c("MASLD_samples", "HCV_samples", "HBV_samples", "Alcohol_samples", 
                 "ALLsample_Viralsamples", "ALLsample_NoViral", "Alcohol_sample_MASLD_samples",
                 "ALLsample_No_MASLD", "ALLsample_No_Alcohol", "ALLsample_No_HBC"),
  "BCLC" = c("BCLC_A", "BCLC_B", "BCLC_C", "ALLBCLC_NoC", "ALLBCLC_NoA"),
  "Survival" = c("Patient_OS_survival", "Patient_PFS_survival")
)

# Define subset query expressions
subset_definitions <- list(
  # Treatment condition subsets
  pre_tace_samples = "Condition1 == 'Pre_TACE'",
  post_tace_samples = "Condition1 == 'Post_TACE'",
  progression_samples = "Condition1 == 'progression'",
  
  # Disease etiology subsets
  MASLD_samples = "Aetiology == 'MASLD'",
  HCV_samples = "Aetiology == 'HCV'",
  HBV_samples = "Aetiology == 'HBV'",
  Alcohol_samples = "Aetiology == 'Alcohol'",
  ALLsample_Viralsamples = "Aetiology %in% c('HCV','HBV')",
  ALLsample_NoViral = "Aetiology %in% c('Alcohol','MASLD','Other')",
  Alcohol_sample_MASLD_samples = "Aetiology %in% c('Alcohol','MASLD')",
  ALLsample_No_MASLD = "Aetiology %in% c('Alcohol','HCV','HBV','Other')",
  ALLsample_No_Alcohol = "Aetiology %in% c('MASLD','HCV','HBV','Other')",
  ALLsample_No_HBC = "Aetiology %in% c('MASLD','HCV','Alcohol','Other')",
  
  # BCLC stage subsets
  BCLC_A = "BCLC %in% c('A')",
  BCLC_B = "BCLC %in% c('B')",
  BCLC_C = "BCLC %in% c('C')",
  ALLBCLC_NoC = "BCLC %in% c('A','B')",
  ALLBCLC_NoA = "BCLC %in% c('B','C')",
  
  # Survival status subsets
  Patient_OS_survival = "OS_Status == '1'",
  Patient_PFS_survival = "PFS_Status == '1'"
)

#==============================================================================
# CREATE COHORT SUBSETS
#==============================================================================
# Create all subsets at once
cat("### Creating MAF subsets\n")
## ### Creating MAF subsets
create_named_subsets(HCC_Final, subset_definitions)
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
## -Processing clinical data
cat("✓ Created", length(subset_definitions), "MAF subsets\n\n")
## ✓ Created 20 MAF subsets
# Create category mapping
category_map <- list()
for(cat in names(subset_categories)) {
  for(subset in subset_categories[[cat]]) {
    category_map[[subset]] <- cat
  }
}

# Get all MAF objects
maf_objects <- list()
for(subset_name in unlist(subset_categories)) {
  if(exists(subset_name) && inherits(get(subset_name), "MAF")) {
    maf_objects[[subset_name]] <- get(subset_name)
  }
}

#==============================================================================
# CALCULATE STATISTICS
#==============================================================================
cat("### Calculating statistics\n")
## ### Calculating statistics
subset_stats <- calculate_maf_stats(maf_objects, reference_maf = HCC_Final, category_map = category_map)
# Ensure it's a tibble to avoid row name issues
subset_stats <- as_tibble(subset_stats)
cat("✓ Calculated statistics for", nrow(subset_stats), "subsets\n\n")
## ✓ Calculated statistics for 20 subsets
#==============================================================================
# GENERATE SUMMARY REPORT
#==============================================================================
# Overall dataset summary
cat("# Hepatocellular Carcinoma Cohort Statistics\n\n")
## # Hepatocellular Carcinoma Cohort Statistics
# Reference dataset information
ref_samples <- length(unique(HCC_Final@clinical.data$Tumor_Sample_Barcode))
ref_mutations <- nrow(HCC_Final@data)
ref_median_muts <- median(table(HCC_Final@data$Tumor_Sample_Barcode))

cat("<div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>")
## <div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
cat("<strong>Reference Dataset Summary:</strong><br>")
## <strong>Reference Dataset Summary:</strong><br>
cat(ref_samples, "samples with", ref_mutations, "mutations<br>")
## 60 samples with 299 mutations<br>
cat("Median mutations per sample:", ref_median_muts, "<br>")
## Median mutations per sample: 3.5 <br>
cat("</div>\n\n")
## </div>
#==============================================================================
# CATEGORY-SPECIFIC STATISTICS
#==============================================================================
# Create tables and visualizations by category
for(cat in unique(subset_stats$Category)) {
  cat(paste0("## ", cat, " Cohorts\n\n"))
  
  # Filter statistics for this category
  cat_stats_filtered <- subset_stats %>% 
    filter(Category == cat) %>%
    arrange(desc(Samples))
  
  # Create a completely fresh tibble without any possibility of row names
  cat_stats <- tibble(
    Subset = cat_stats_filtered$Subset,
    Samples = cat_stats_filtered$Samples,
    Mutations = cat_stats_filtered$Mutations,
    MedianMutsPerSample = cat_stats_filtered$MedianMutsPerSample,
    TopGene = cat_stats_filtered$TopGene,
    TopGeneMutationCount = cat_stats_filtered$TopGeneMutationCount,
    SamplePct = cat_stats_filtered$SamplePct,
    MutationPct = cat_stats_filtered$MutationPct
  )
  
  cat("### Summary Table\n")
  # Add row.names=FALSE parameter here
  knitr::kable(cat_stats, 
               caption = paste(cat, "Cohort Statistics"),
               format = "html",
               row.names = FALSE) %>%  # Explicitly set row.names=FALSE
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                 full_width = FALSE) %>%
    column_spec(1, bold = TRUE) %>%
    print()
  
  cat("\n### Visualizations\n")
  # Sample count visualization
  p1 <- ggplot(cat_stats, aes(x = reorder(Subset, -Samples), y = Samples, fill = Subset)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = Samples), vjust = -0.5, size = 3.5) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "none") +
    labs(title = paste("Sample Counts for", cat, "Cohorts"),
         x = NULL, y = "Number of Samples") +
    scale_fill_brewer(palette = "Set2")
  
  # Mutation rate visualization  
  p2 <- ggplot(cat_stats, aes(x = reorder(Subset, -MedianMutsPerSample), 
                            y = MedianMutsPerSample, fill = Subset)) +
    geom_bar(stat = "identity") +
    geom_text(aes(label = MedianMutsPerSample), vjust = -0.5, size = 3.5) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "none") +
    labs(title = paste("Mutation Rate for", cat, "Cohorts"),
         x = NULL, y = "Median Mutations per Sample") +
    scale_fill_brewer(palette = "Set2")
  
  # Display plots
  print(p1)
  print(p2)
  
  cat("\n\n")
}
## ## Treatment Cohorts
## 
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>Treatment Cohort Statistics</caption>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> Subset </th>
##    <th style="text-align:right;"> Samples </th>
##    <th style="text-align:right;"> Mutations </th>
##    <th style="text-align:right;"> MedianMutsPerSample </th>
##    <th style="text-align:left;"> TopGene </th>
##    <th style="text-align:right;"> TopGeneMutationCount </th>
##    <th style="text-align:right;"> SamplePct </th>
##    <th style="text-align:right;"> MutationPct </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> post_tace_samples </td>
##    <td style="text-align:right;"> 26 </td>
##    <td style="text-align:right;"> 128 </td>
##    <td style="text-align:right;"> 4 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 14 </td>
##    <td style="text-align:right;"> 43.3 </td>
##    <td style="text-align:right;"> 42.8 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> pre_tace_samples </td>
##    <td style="text-align:right;"> 23 </td>
##    <td style="text-align:right;"> 110 </td>
##    <td style="text-align:right;"> 3 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 15 </td>
##    <td style="text-align:right;"> 38.3 </td>
##    <td style="text-align:right;"> 36.8 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> progression_samples </td>
##    <td style="text-align:right;"> 11 </td>
##    <td style="text-align:right;"> 61 </td>
##    <td style="text-align:right;"> 3 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 16 </td>
##    <td style="text-align:right;"> 18.3 </td>
##    <td style="text-align:right;"> 20.4 </td>
##   </tr>
## </tbody>
## </table>
## ### Visualizations

## 
## 
## ## Etiology Cohorts
## 
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>Etiology Cohort Statistics</caption>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> Subset </th>
##    <th style="text-align:right;"> Samples </th>
##    <th style="text-align:right;"> Mutations </th>
##    <th style="text-align:right;"> MedianMutsPerSample </th>
##    <th style="text-align:left;"> TopGene </th>
##    <th style="text-align:right;"> TopGeneMutationCount </th>
##    <th style="text-align:right;"> SamplePct </th>
##    <th style="text-align:right;"> MutationPct </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLsample_No_HBC </td>
##    <td style="text-align:right;"> 58 </td>
##    <td style="text-align:right;"> 284 </td>
##    <td style="text-align:right;"> 3.5 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 34 </td>
##    <td style="text-align:right;"> 96.7 </td>
##    <td style="text-align:right;"> 95.0 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLsample_No_Alcohol </td>
##    <td style="text-align:right;"> 44 </td>
##    <td style="text-align:right;"> 198 </td>
##    <td style="text-align:right;"> 3.0 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 25 </td>
##    <td style="text-align:right;"> 73.3 </td>
##    <td style="text-align:right;"> 66.2 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLsample_No_MASLD </td>
##    <td style="text-align:right;"> 42 </td>
##    <td style="text-align:right;"> 225 </td>
##    <td style="text-align:right;"> 3.5 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 31 </td>
##    <td style="text-align:right;"> 70.0 </td>
##    <td style="text-align:right;"> 75.3 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLsample_NoViral </td>
##    <td style="text-align:right;"> 38 </td>
##    <td style="text-align:right;"> 197 </td>
##    <td style="text-align:right;"> 4.0 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 31 </td>
##    <td style="text-align:right;"> 63.3 </td>
##    <td style="text-align:right;"> 65.9 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> Alcohol_sample_MASLD_samples </td>
##    <td style="text-align:right;"> 34 </td>
##    <td style="text-align:right;"> 175 </td>
##    <td style="text-align:right;"> 4.5 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 30 </td>
##    <td style="text-align:right;"> 56.7 </td>
##    <td style="text-align:right;"> 58.5 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLsample_Viralsamples </td>
##    <td style="text-align:right;"> 22 </td>
##    <td style="text-align:right;"> 102 </td>
##    <td style="text-align:right;"> 3.0 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 11 </td>
##    <td style="text-align:right;"> 36.7 </td>
##    <td style="text-align:right;"> 34.1 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> HCV_samples </td>
##    <td style="text-align:right;"> 20 </td>
##    <td style="text-align:right;"> 87 </td>
##    <td style="text-align:right;"> 3.0 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 11 </td>
##    <td style="text-align:right;"> 33.3 </td>
##    <td style="text-align:right;"> 29.1 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> MASLD_samples </td>
##    <td style="text-align:right;"> 18 </td>
##    <td style="text-align:right;"> 74 </td>
##    <td style="text-align:right;"> 3.5 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 13 </td>
##    <td style="text-align:right;"> 30.0 </td>
##    <td style="text-align:right;"> 24.7 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> Alcohol_samples </td>
##    <td style="text-align:right;"> 16 </td>
##    <td style="text-align:right;"> 101 </td>
##    <td style="text-align:right;"> 5.0 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 25 </td>
##    <td style="text-align:right;"> 26.7 </td>
##    <td style="text-align:right;"> 33.8 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> HBV_samples </td>
##    <td style="text-align:right;"> 2 </td>
##    <td style="text-align:right;"> 15 </td>
##    <td style="text-align:right;"> 7.5 </td>
##    <td style="text-align:left;"> AXIN1 </td>
##    <td style="text-align:right;"> 5 </td>
##    <td style="text-align:right;"> 3.3 </td>
##    <td style="text-align:right;"> 5.0 </td>
##   </tr>
## </tbody>
## </table>
## ### Visualizations

## 
## 
## ## BCLC Cohorts
## 
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>BCLC Cohort Statistics</caption>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> Subset </th>
##    <th style="text-align:right;"> Samples </th>
##    <th style="text-align:right;"> Mutations </th>
##    <th style="text-align:right;"> MedianMutsPerSample </th>
##    <th style="text-align:left;"> TopGene </th>
##    <th style="text-align:right;"> TopGeneMutationCount </th>
##    <th style="text-align:right;"> SamplePct </th>
##    <th style="text-align:right;"> MutationPct </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLBCLC_NoC </td>
##    <td style="text-align:right;"> 49 </td>
##    <td style="text-align:right;"> 218 </td>
##    <td style="text-align:right;"> 3 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 26 </td>
##    <td style="text-align:right;"> 81.7 </td>
##    <td style="text-align:right;"> 72.9 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> ALLBCLC_NoA </td>
##    <td style="text-align:right;"> 41 </td>
##    <td style="text-align:right;"> 186 </td>
##    <td style="text-align:right;"> 3 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 29 </td>
##    <td style="text-align:right;"> 68.3 </td>
##    <td style="text-align:right;"> 62.2 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> BCLC_B </td>
##    <td style="text-align:right;"> 30 </td>
##    <td style="text-align:right;"> 105 </td>
##    <td style="text-align:right;"> 3 </td>
##    <td style="text-align:left;"> ARID1A </td>
##    <td style="text-align:right;"> 14 </td>
##    <td style="text-align:right;"> 50.0 </td>
##    <td style="text-align:right;"> 35.1 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> BCLC_A </td>
##    <td style="text-align:right;"> 19 </td>
##    <td style="text-align:right;"> 113 </td>
##    <td style="text-align:right;"> 5 </td>
##    <td style="text-align:left;"> SF3B1 </td>
##    <td style="text-align:right;"> 15 </td>
##    <td style="text-align:right;"> 31.7 </td>
##    <td style="text-align:right;"> 37.8 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> BCLC_C </td>
##    <td style="text-align:right;"> 11 </td>
##    <td style="text-align:right;"> 81 </td>
##    <td style="text-align:right;"> 6 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 20 </td>
##    <td style="text-align:right;"> 18.3 </td>
##    <td style="text-align:right;"> 27.1 </td>
##   </tr>
## </tbody>
## </table>
## ### Visualizations

## 
## 
## ## Survival Cohorts
## 
## ### Summary Table
## <table class="table table-striped table-hover table-condensed" style="color: black; width: auto !important; margin-left: auto; margin-right: auto;">
## <caption>Survival Cohort Statistics</caption>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> Subset </th>
##    <th style="text-align:right;"> Samples </th>
##    <th style="text-align:right;"> Mutations </th>
##    <th style="text-align:right;"> MedianMutsPerSample </th>
##    <th style="text-align:left;"> TopGene </th>
##    <th style="text-align:right;"> TopGeneMutationCount </th>
##    <th style="text-align:right;"> SamplePct </th>
##    <th style="text-align:right;"> MutationPct </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> Patient_PFS_survival </td>
##    <td style="text-align:right;"> 41 </td>
##    <td style="text-align:right;"> 221 </td>
##    <td style="text-align:right;"> 4 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 31 </td>
##    <td style="text-align:right;"> 68.3 </td>
##    <td style="text-align:right;"> 73.9 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;font-weight: bold;"> Patient_OS_survival </td>
##    <td style="text-align:right;"> 37 </td>
##    <td style="text-align:right;"> 209 </td>
##    <td style="text-align:right;"> 5 </td>
##    <td style="text-align:left;"> CTNNB1 </td>
##    <td style="text-align:right;"> 31 </td>
##    <td style="text-align:right;"> 61.7 </td>
##    <td style="text-align:right;"> 69.9 </td>
##   </tr>
## </tbody>
## </table>
## ### Visualizations

#==============================================================================
# CROSS-CATEGORY COMPARISONS
#==============================================================================
cat("## Cross-Category Comparisons\n\n")
## ## Cross-Category Comparisons
# Sample size comparison across categories
# Use proper tibble-compatible approach for factor reordering
subset_stats_ordered <- subset_stats %>%
  mutate(Subset = factor(Subset, levels = Subset[order(Samples, decreasing = TRUE)]))

# Overall sample size comparison
cat("### Overall Sample Count Comparison\n")
## ### Overall Sample Count Comparison
overall_plot <- ggplot(subset_stats_ordered, aes(x = Subset, y = Samples, fill = Category)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Samples), vjust = -0.3, size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Cohort Sizes Across All Categories",
       x = NULL, y = "Number of Samples") +
  scale_fill_brewer(palette = "Set2")

print(overall_plot)

# Mutation rate comparison
cat("\n### Overall Mutation Rate Comparison\n")
## 
## ### Overall Mutation Rate Comparison
mutation_plot <- ggplot(subset_stats, aes(x = reorder(Subset, -MedianMutsPerSample), 
                                        y = MedianMutsPerSample, fill = Category)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = MedianMutsPerSample), vjust = -0.3, size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Mutation Rates Across All Categories",
       x = NULL, y = "Median Mutations per Sample") +
  scale_fill_brewer(palette = "Set2")

print(mutation_plot)

#==============================================================================
# INTERACTIVE DATA EXPLORATION
#==============================================================================
cat("\n## Interactive Data Table\n\n")
## 
## ## Interactive Data Table
if(requireNamespace("DT", quietly = TRUE)) {
  # Create a completely new tibble from scratch to avoid inheriting row names
  display_stats <- tibble(
    Category = subset_stats$Category,
    Subset = subset_stats$Subset,
    Samples = subset_stats$Samples,
    Mutations = subset_stats$Mutations,
    MedianMutsPerSample = subset_stats$MedianMutsPerSample,
    TopGene = subset_stats$TopGene,
    TopGeneMutationCount = subset_stats$TopGeneMutationCount,
    SamplePct = subset_stats$SamplePct,
    MutationPct = subset_stats$MutationPct
  )
  
  DT::datatable(display_stats,
                caption = "Comprehensive Cohort Statistics",
                filter = "top",
                options = list(
                  pageLength = 20,
                  dom = 'Blfrtip',
                  buttons = c('copy', 'csv'),
                  lengthMenu = list(c(10, 20, -1), c('10', '20', 'All'))
                ),
                rownames = FALSE) %>%
    DT::formatStyle(
      'Samples',
      background = DT::styleColorBar(c(0, max(display_stats$Samples)), 'steelblue'),
      backgroundSize = '98% 88%',
      backgroundRepeat = 'no-repeat',
      backgroundPosition = 'center'
    ) %>%
    DT::formatStyle(
      'Mutations',
      background = DT::styleColorBar(c(0, max(display_stats$Mutations)), 'forestgreen'),
      backgroundSize = '98% 88%',
      backgroundRepeat = 'no-repeat',
      backgroundPosition = 'center'
    ) %>%
    DT::formatRound(columns = c('MedianMutsPerSample', 'SamplePct', 'MutationPct'), digits = 1)
} else {
  cat("*Note: Install the DT package for interactive data tables*")
}

2.4 Dataset Overview

# Create a dashboard with key metrics
valueBox_html <- function(value, caption, icon, color) {
  paste0(
    '<div class="section-box" style="background-color:', 
    switch(color,
           "primary" = "#e8f0f9",
           "info" = "#e3f2fd",
           "success" = "#e8f8ef",
           "warning" = "#fff8e8",
           "#e8f0f9"), 
    '; padding: 15px; border-radius: 5px; margin: 10px; flex: 1; min-width: 250px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);">',
    '<div style="display:flex; align-items:center;">',
    '<div style="font-size:2.5em; margin-right:15px; color:', 
    switch(color,
           "primary" = "#3498db",
           "info" = "#0D47A1",
           "success" = "#27ae60",
           "warning" = "#f39c12",
           "#3498db"), 
    ';">',
    '<i class="fa ', icon, '"></i>',
    '</div>',
    '<div>',
    '<div style="font-size:1.8em; font-weight:bold;">', value, '</div>',
    '<div style="font-size:1.2em; color:#7f8c8d;">', caption, '</div>',
    '</div>',
    '</div>',
    '</div>'
  )
}

if (exists("HCC_Final") && exists("HCC_Final_non_intron") && exists("HCC_Final_non_synonymous")) {
  # Number of samples
  num_samples <- length(unique(HCC_Final@clinical.data$Tumor_Sample_Barcode))
  # Number of mutations
  num_mutations <- nrow(HCC_Final@data)
  # Median mutations per sample
  median_muts <- median(HCC_Final@variants.per.sample$Variants)
  # Number of samples non-intron
  num_samplesnon_intron <- length(unique(HCC_Final_non_intron@clinical.data$Tumor_Sample_Barcode))
  # Number of mutations non-intron
  num_mutationsnon_intron <- nrow(HCC_Final_non_intron@data)
  # Median mutations per sample non-intron
  median_mutsnon_intron <- median(HCC_Final_non_intron@variants.per.sample$Variants)
  # Number of samples non-synonymous
  num_samplesnon_synonymous <- length(unique(HCC_Final_non_synonymous@clinical.data$Tumor_Sample_Barcode))
  # Number of mutations non-synonymous
  num_mutationsnon_synonymous <- nrow(HCC_Final_non_synonymous@data)
  # Median mutations per sample non-synonymous
  median_mutsnon_synonymous <- median(HCC_Final_non_synonymous@variants.per.sample$Variants)
} else {
  cat("Data not properly loaded. Please check the data import step.")
}

All Mutations

60
Total Patients
299
Total Mutations
3.5
Median Mutations per Patient

Non-Intron Mutations

60
Patients with Non-Intron Mutations
153
Non-Intron Mutations
2
Median Non-Intron Mutations per Patient

Non-Synonymous Mutations

60
Patients with Non-Synonymous Mutations
63
Non-Synonymous Mutations
1
Median Non-Synonymous Mutations per Patient
if (requireNamespace("gtsummary", quietly = TRUE) && requireNamespace("DT", quietly = TRUE) && exists("HCC_Final")) {
  library(gtsummary)
  library(dplyr)
  library(DT)
  
  # Get all unique patients from the first Pre_TACE sample for each patient
  unique_patients <- HCC_Final@clinical.data %>%
    as.data.frame() %>%
    filter(Condition1 == "Pre_TACE") %>%
    distinct(Pateint, .keep_all = TRUE)
  
  # If not all patients have Pre_TACE samples, add their first available sample
  if (nrow(unique_patients) < length(unique(HCC_Final@clinical.data$Pateint))) {
    missing_patients <- setdiff(unique(HCC_Final@clinical.data$Pateint), unique_patients$Pateint)
    
    for (patient in missing_patients) {
      patient_samples <- HCC_Final@clinical.data[HCC_Final@clinical.data$Pateint == patient, ]
      unique_patients <- rbind(unique_patients, patient_samples[1, ])
    }
  }
  
  # Select relevant columns for summary
  clin_cols <- c("Age", "Gender", "Aetiology", "Viral_Not_Viral", "Cirrhosis", 
                 "CTP", "BCLC", "Tumour_Size", "Response", "OS_time",
                 "OS_Status", "PFS_time", "PFS_Status", "PS", "HAP", "PVT")
  
  # Create summary table for available columns
  available_cols <- intersect(clin_cols, colnames(unique_patients))
  
  if (length(available_cols) > 0) {
    # Create categorization for variables
    var_labels <- list(
      Age = "Age (years)",
      Gender = "Sex",
      Aetiology = "Etiology",
      Viral_Not_Viral = "Viral Status",
      Cirrhosis = "Cirrhosis",
      CTP = "Child-Turcotte-Pugh Class",
      BCLC = "Barcelona Clinic Liver Cancer Stage",
      Tumour_Size = "Tumor Size (cm)",
      Response = "Treatment Response",
      OS_time = "Overall Survival (months)",
      OS_Status = "Overall Survival Status",
      PFS_time = "Progression-Free Survival (months)",
      PFS_Status = "Progression Status",
      PS = "Performance Status",
      HAP = "Hepatoma Arterial-embolization Prognostic Score",
      PVT = "Portal Vein Thrombosis"
    )
    
    # Subset to available variables
    var_labels <- var_labels[names(var_labels) %in% available_cols]
    
    # Create summary statistics in an organized way
    tbl_summary_data <- unique_patients %>%
      select(all_of(available_cols)) %>%
      tbl_summary(
        by = NULL,
        label = var_labels,
        statistic = list(
          all_continuous() ~ "{median} ({p25}, {p75})",
          all_categorical() ~ "{n} ({p}%)"
        ),
        digits = all_continuous() ~ 1,
        missing = "no"
      ) %>%
      modify_header(label = "**Characteristic**", stat_0 = "**Value (N={N})**") %>%
      bold_labels() %>%
      as_gt() %>%
      gt::tab_header(title = "Clinical Characteristics of HCC Patients Treated with TACE")
    
    # Print the table
    tbl_summary_data
    
    # Create interactive table
    DT::datatable(
      unique_patients %>% select(all_of(available_cols)),
      options = list(
        pageLength = 10,
        scrollX = TRUE,
        dom = 'Blfrtip',
        buttons = c('copy', 'csv', 'excel'),
        columnDefs = list(
          list(className = 'dt-center', targets = "_all")
        )
      ),
      filter = 'top',
      class = 'cell-border stripe hover',
      caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: center; font-size: 16px; font-weight: bold; margin-top: 20px;',
        'Interactive Table: HCC Patient Data'
      )
    )
  } else {
    cat("Clinical data columns not found in the expected format.")
  }
} else {
  cat("Required packages (gtsummary, DT) not available or data not properly loaded.")
}

3 ) 🌱 Mutation Analysis.

3.1 Mutation Summary

3.1.1 All Mutations

if (exists("HCC_Final") && requireNamespace("maftools", quietly = TRUE)) {
  # Plot mutation summary
  par(mfrow = c(1, 1))
  tryCatch({
    plotmafSummary(
      maf = HCC_Final, 
      rmOutlier = TRUE, 
      dashboard = TRUE,
      addStat = 'median', 
      showBarcodes = TRUE, 
      fs = 1,
      color = fabcolors$Variant_Classification,
      log_scale = FALSE,
      textSize = 0.8,
      titleSize = c(1, 1)
    )
  }, error = function(e) {
    cat("Error in plotting MAF summary:", e$message, "\n")
    cat("Reverting to simplified plot.\n")
    
    # Simplified summary plot
    plotmafSummary(
      maf = HCC_Final,
      rmOutlier = TRUE,
      addStat = 'median'
    )
  })
}

3.1.2 Non-Intron Mutations

if (exists("HCC_Final_non_intron") && requireNamespace("maftools", quietly = TRUE)) {
  # Plot mutation summary
  par(mfrow = c(1, 1))
  tryCatch({
    # Create standard summary plot with larger text
    plotmafSummary(
      maf = HCC_Final_non_intron, 
      rmOutlier = TRUE, 
      dashboard = TRUE,
      addStat = 'median', 
      showBarcodes = TRUE, 
      fs = 0.9,             # Increase cex font size parameter for gene names
      color = fabcolors$Variant_Classification,
      log_scale = FALSE,
      textSize = 1.2,       # Increase text size for labels and annotations
      titleSize = c(1.5, 1.3)  # Increase title and subtitle sizes
    )
    
    # Additionally, create a separate bar plot specifically for the HCC genes of interest
    par(mfrow = c(1, 1))
    
    # Check which of your HCC genes actually have mutations in the dataset
    genes_in_data <- HCC_genes[HCC_genes %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)]
    
    if(length(genes_in_data) > 0) {
      # Generate a focused oncoplot just for your HCC genes
      cat("\nGenerating focused plot for HCC genes of interest:\n")
      
      # Set custom global plot parameters for larger text
      par(cex.axis = 1.2,    # Increase axis text size
          cex.lab = 1.4,     # Increase axis labels size
          cex.main = 1.5)    # Increase title size
      
      # Use only the supported parameters
      mafbarplot(
        maf = HCC_Final_non_intron,
        genes = HCC_genes,  # Use your specific HCC genes list
        fontSize = 0.9,     # Increase font size for gene names and percentages
        color = fabcolors$Variant_Classification,
        showPct = TRUE
      )
      
      cat("Displayed mutations for these HCC genes of interest:\n")
      cat(paste(genes_in_data, collapse = ", "), "\n")
    } else {
      cat("None of the specified HCC genes have mutations in this dataset.\n")
    }
  }, error = function(e) {
    cat("Error in plotting:", e$message, "\n")
    cat("Reverting to simplified plot.\n")
    
    # Simplified summary plot
    plotmafSummary(
      maf = HCC_Final_non_intron,
      rmOutlier = TRUE,
      addStat = 'median',
      textSize = 1.2         # Increase text size here too
    )
    
    # Try simplified gene-specific plot
    tryCatch({
      mafbarplot(
        maf = HCC_Final_non_intron,
        genes = HCC_genes,
        fontSize = 1.3       # Increase font size
      )
    }, error = function(e2) {
      cat("Could not generate HCC gene-specific plot:", e2$message, "\n")
    })
  })
}

## 
## Generating focused plot for HCC genes of interest:

## Displayed mutations for these HCC genes of interest:
## APC, ARID1A, ARID2, ATM, AXIN1, CDKN2A, CTNNB1, HNF1A, JAK1, KEAP1, LZTR1, PIK3CA, PTEN, RB1, SF3B1, SMARCA4, TERT, TP53
getSampleSummary(HCC_Final_non_intron)
# After generating your plots, save them with higher resolution
png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/HCC_mutation_summary.png", width = 12, height = 10, units = "in", res = 400)  # Increased size and resolution

# Set global parameters for saved plot
par(cex.axis = 1.2, cex.lab = 1.4, cex.main = 1.5)

# Here we need to regenerate the plot to capture it in the PNG
plotmafSummary(
  maf = HCC_Final_non_intron, 
  rmOutlier = TRUE, 
  dashboard = TRUE,
  addStat = 'median', 
  showBarcodes = TRUE, 
  fs = 1.2,               # Increased font size
  color = fabcolors$Variant_Classification,
  log_scale = FALSE,
  textSize = 1.2,         # Increased text size
  titleSize = c(1.5, 1.3) # Increased title sizes
)
dev.off()
## quartz_off_screen 
##                 2
# Save the HCC genes specific plot with higher resolution
if(exists("genes_in_data") && length(genes_in_data) > 0) {
  png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/HCC_genes_mutations.png", width = 12, height = 10, units = "in", res = 400)  # Increased size and resolution
  
  # Set global parameters for saved plot
  par(cex.axis = 1.2, cex.lab = 1.4, cex.main = 1.5, mar = c(5, 10, 4, 12))  # Increased margins
  
  mafbarplot(
    maf = HCC_Final_non_intron,
    genes = HCC_genes,
    fontSize = 1.3,        # Increased font size
    color = fabcolors$Variant_Classification,
    showPct = TRUE
  )
  
  # Add custom x-axis label with larger font
  title(xlab = "Number of alterations", cex.lab = 1.4, font.lab = 2)
  
  dev.off()
}
## quartz_off_screen 
##                 2

3.1.3 Non-Synonymous Mutations

if (exists("HCC_Final_non_synonymous") && requireNamespace("maftools", quietly = TRUE)) {
  # Plot mutation summary
  par(mfrow = c(1, 1))
  tryCatch({
    plotmafSummary(
      maf = HCC_Final_non_synonymous, 
      rmOutlier = TRUE, 
      dashboard = TRUE,
      addStat = 'median', 
      showBarcodes = TRUE, 
      fs = 1,
      color = fabcolors$Variant_Classification,
      log_scale = FALSE,
      textSize = 0.8,
      titleSize = c(1, 1)
    )
  }, error = function(e) {
    cat("Error in plotting MAF summary:", e$message, "\n")
    cat("Reverting to simplified plot.\n")
    
    # Simplified summary plot
    plotmafSummary(
      maf = HCC_Final_non_synonymous,
      rmOutlier = TRUE,
      addStat = 'median'
    )
  })
}

if (exists("HCC_Final_non_intron")) {
  # Create directory for publication figures
  dir.create("publication_figures", showWarnings = FALSE)
  
  # Define professional color palettes with brighter colors
  mutation_colors <- c(
    "Missense_Mutation" = "#4285F4",   # Blue
    "Frame_Shift_Del" = "#EA4335",     # Red
    "Nonsense_Mutation" = "#FBBC05",   # Yellow
    "Splice_Site" = "#34A853",         # Green
    "In_Frame_Del" = "#FF9900",        # Orange
    "Frame_Shift_Ins" = "#9C27B0",     # Purple
    "In_Frame_Ins" = "#00ACC1",        # Cyan
    "3'UTR" = "#8E24AA",               # Bright Purple
    "5'UTR" = "#FFD600",               # Bright Yellow
    "5'Flank" = "#795548",             # Brown
    "Silent" = "#BDBDBD"               # Gray
  )
  
  variant_colors <- c(
    "SNP" = "#FF5252",   # Red
    "DEL" = "#448AFF",   # Blue
    "INS" = "#66BB6A",   # Green
    "DNP" = "#FFC107",   # Amber
    "TNP" = "#7E57C2",   # Deep Purple
    "ONP" = "#26A69A"    # Teal
  )
  
  snv_colors <- c(
    "C>A" = "#27AE60",   # Green
    "C>G" = "#3498DB",   # Blue
    "C>T" = "#E74C3C",   # Red
    "T>A" = "#F39C12",   # Orange
    "T>C" = "#9B59B6",   # Purple
    "T>G" = "#2C3E50"    # Dark Blue
  )
  
  # -------------------------------------------------------------------------------
  # 1. VARIANT CLASSIFICATION PLOT
  # -------------------------------------------------------------------------------
  tryCatch({
    # Extract variant classification data
    vc_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Classification))
    colnames(vc_data) <- c("Variant_Classification", "Frequency")
    vc_data <- vc_data[order(-vc_data$Frequency),]
    
    # Set up colors
    vc_colors <- rep("#1F77B4", nrow(vc_data))  # Default blue color
    for (i in 1:nrow(vc_data)) {
      if (vc_data$Variant_Classification[i] %in% names(mutation_colors)) {
        vc_colors[i] <- mutation_colors[as.character(vc_data$Variant_Classification[i])]
      }
    }
    
    # Create the plot
    png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Variant_Classification.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
    
    # Set up margins and text parameters
    par(mar = c(5, 12, 4, 4), mgp = c(3, 0.7, 0), 
        cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
        family = "sans")
    
    # Calculate x-axis limit with some padding
    x_max <- max(vc_data$Frequency) * 1.15
    
    # Create horizontal barplot with no borders
    barplot_result <- barplot(vc_data$Frequency, 
            horiz = TRUE, 
            names.arg = FALSE,
            col = vc_colors,
            border = NA,
            xlim = c(0, x_max),
            xlab = "", 
            main = "Variant Classification",
            axes = FALSE)
    
    # Add custom x-axis with gridlines
    axis(1, at = pretty(c(0, max(vc_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
    abline(v = pretty(c(0, max(vc_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
    
    # Add y-axis labels aligned right
    text(x = -max(vc_data$Frequency) * 0.05, 
         y = barplot_result, 
         labels = vc_data$Variant_Classification, 
         xpd = TRUE, 
         adj = 1,
         cex = 0.9)
    
    # Add count labels at the end of each bar
    text(x = vc_data$Frequency + max(vc_data$Frequency) * 0.02, 
         y = barplot_result, 
         labels = vc_data$Frequency, 
         adj = 0, 
         cex = 0.8)
    
    # Add x-axis label
    mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
    
    dev.off()
    cat("Created Variant_Classification.png\n")
  }, error = function(e) {
    cat("Error creating Variant Classification plot:", e$message, "\n")
  })
  
  # -------------------------------------------------------------------------------
  # 2. VARIANT TYPE PLOT
  # -------------------------------------------------------------------------------
  tryCatch({
    # Extract variant type data
    vt_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Type))
    colnames(vt_data) <- c("Variant_Type", "Frequency")
    vt_data <- vt_data[order(-vt_data$Frequency),]
    
    # Set up colors
    vt_colors <- rep("#1F77B4", nrow(vt_data))  # Default blue color
    for (i in 1:nrow(vt_data)) {
      if (vt_data$Variant_Type[i] %in% names(variant_colors)) {
        vt_colors[i] <- variant_colors[as.character(vt_data$Variant_Type[i])]
      }
    }
    
    # Create the plot
    png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Variant_Type.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
    
    # Set up margins and text parameters
    par(mar = c(5, 10, 4, 4), mgp = c(3, 0.7, 0), 
        cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
        family = "sans")
    
    # Calculate x-axis limit with some padding
    x_max <- max(vt_data$Frequency) * 1.15
    
    # Create horizontal barplot with no borders
    barplot_result <- barplot(vt_data$Frequency, 
            horiz = TRUE, 
            names.arg = FALSE,
            col = vt_colors,
            border = NA,
            xlim = c(0, x_max),
            xlab = "", 
            main = "Variant Type",
            axes = FALSE)
    
    # Add custom x-axis with gridlines
    axis(1, at = pretty(c(0, max(vt_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
    abline(v = pretty(c(0, max(vt_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
    
    # Add y-axis labels aligned right
    text(x = -max(vt_data$Frequency) * 0.05, 
         y = barplot_result, 
         labels = vt_data$Variant_Type, 
         xpd = TRUE, 
         adj = 1,
         cex = 0.9)
    
    # Add count labels at the end of each bar
    text(x = vt_data$Frequency + max(vt_data$Frequency) * 0.02, 
         y = barplot_result, 
         labels = vt_data$Frequency, 
         adj = 0, 
         cex = 0.8)
    
    # Add x-axis label
    mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
    
    dev.off()
    cat("Created Variant_Type.png\n")
  }, error = function(e) {
    cat("Error creating Variant Type plot:", e$message, "\n")
  })
  
  # -------------------------------------------------------------------------------
  # 3. SNV CLASS PLOT
  # -------------------------------------------------------------------------------
  tryCatch({
    # Try to extract SNV data from Variant_Type == "SNP"
    snp_data <- HCC_Final_non_intron@data[HCC_Final_non_intron@data$Variant_Type == "SNP", ]
    
    if (nrow(snp_data) > 0 && all(c("Reference_Allele", "Tumor_Seq_Allele2") %in% colnames(snp_data))) {
      # Create SNV classification
      snp_data$SNV_Class <- paste0(snp_data$Reference_Allele, ">", snp_data$Tumor_Seq_Allele2)
      
      # Keep only standard classes
      std_classes <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
      snv_counts <- table(factor(snp_data$SNV_Class, levels = std_classes))
      
      # Create data frame from counts
      snv_data <- data.frame(
        SNV_Class = names(snv_counts),
        Frequency = as.numeric(snv_counts)
      )
      snv_data <- snv_data[snv_data$Frequency > 0, ]
      snv_data <- snv_data[order(-snv_data$Frequency), ]
      
      # Set up colors
      plot_colors <- rep("#1F77B4", nrow(snv_data))  # Default blue color
      for (i in 1:nrow(snv_data)) {
        if (snv_data$SNV_Class[i] %in% names(snv_colors)) {
          plot_colors[i] <- snv_colors[as.character(snv_data$SNV_Class[i])]
        }
      }
      
      # Create the plot
      png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/SNV_Class.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
      
      # Set up margins and text parameters
      par(mar = c(5, 10, 4, 4), mgp = c(3, 0.7, 0), 
          cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
          family = "sans")
      
      # Calculate x-axis limit with some padding
      x_max <- max(snv_data$Frequency) * 1.15
      
      # Create horizontal barplot with no borders
      barplot_result <- barplot(snv_data$Frequency, 
              horiz = TRUE, 
              names.arg = FALSE,
              col = plot_colors,
              border = NA,
              xlim = c(0, x_max),
              xlab = "", 
              main = "SNV Class",
              axes = FALSE)
      
      # Add custom x-axis with gridlines
      axis(1, at = pretty(c(0, max(snv_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
      abline(v = pretty(c(0, max(snv_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
      
      # Add y-axis labels aligned right
      text(x = -max(snv_data$Frequency) * 0.05, 
           y = barplot_result, 
           labels = snv_data$SNV_Class, 
           xpd = TRUE, 
           adj = 1,
           cex = 0.9)
      
      # Add count labels at the end of each bar
      text(x = snv_data$Frequency + max(snv_data$Frequency) * 0.02, 
           y = barplot_result, 
           labels = snv_data$Frequency, 
           adj = 0, 
           cex = 0.8)
      
      # Add x-axis label
      mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
      
      dev.off()
      cat("Created SNV_Class.png\n")
    } else {
      # Try using maftools titv if available
      if ("titv" %in% ls(envir = asNamespace("maftools"), all.names = TRUE)) {
        png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/SNV_Class.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
        titv_result <- titv(maf = HCC_Final_non_intron, plot = FALSE)
        plotTiTv(res = titv_result, fontSize = 1.2, mainLabelSize = 1.5)
        dev.off()
        cat("Created SNV_Class.png using maftools' titv function\n")
      } else {
        cat("Could not generate SNV Class plot: insufficient data\n")
      }
    }
  }, error = function(e) {
    cat("Error creating SNV Class plot:", e$message, "\n")
    
    # Fallback to simpler approach
    tryCatch({
      png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/SNV_Class.png", width = 8, height = 6, units = "in", res = 300, bg = "white")
      par(mar = c(5, 5, 4, 2))
      plotTiTv(res = titv(maf = HCC_Final_non_intron, returnAll = TRUE), fontSize = 1.2)
      dev.off()
    }, error = function(e2) {
      cat("Could not create SNV Class plot using alternative method:", e2$message, "\n")
    })
  })
  
  # -------------------------------------------------------------------------------
  # 4. COMBINED PLOT WITH PANEL LABELS
  # -------------------------------------------------------------------------------
  tryCatch({
    png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Combined_Mutation_Summary.png", width = 18, height = 6, units = "in", res = 300, bg = "white")
    
    # Create a 1x3 layout
    layout(matrix(1:3, nrow = 1), widths = c(1, 1, 1))
    
    # 1. Variant Classification
    vc_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Classification))
    colnames(vc_data) <- c("Variant_Classification", "Frequency")
    vc_data <- vc_data[order(-vc_data$Frequency),]
    
    vc_colors <- rep("#1F77B4", nrow(vc_data))  # Default blue color
    for (i in 1:nrow(vc_data)) {
      if (vc_data$Variant_Classification[i] %in% names(mutation_colors)) {
        vc_colors[i] <- mutation_colors[as.character(vc_data$Variant_Classification[i])]
      }
    }
    
    # Set up margins and text parameters
    par(mar = c(5, 12, 4, 2), mgp = c(3, 0.7, 0), 
        cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
        family = "sans")
    
    # Calculate x-axis limit with some padding
    x_max <- max(vc_data$Frequency) * 1.15
    
    # Create horizontal barplot with no borders
    barplot_result <- barplot(vc_data$Frequency, 
            horiz = TRUE, 
            names.arg = FALSE,
            col = vc_colors,
            border = NA,
            xlim = c(0, x_max),
            xlab = "", 
            main = "Variant Classification",
            axes = FALSE)
    
    # Add custom x-axis with gridlines
    axis(1, at = pretty(c(0, max(vc_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
    abline(v = pretty(c(0, max(vc_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
    
    # Add y-axis labels aligned right
    text(x = -max(vc_data$Frequency) * 0.05, 
         y = barplot_result, 
         labels = vc_data$Variant_Classification, 
         xpd = TRUE, 
         adj = 1,
         cex = 0.9)
    
    # Add count labels at the end of each bar
    text(x = vc_data$Frequency + max(vc_data$Frequency) * 0.02, 
         y = barplot_result, 
         labels = vc_data$Frequency, 
         adj = 0, 
         cex = 0.8)
    
    # Add x-axis label
    mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
    
    # Add panel label
    text(x = -max(vc_data$Frequency) * 0.1, y = max(barplot_result) * 1.15, 
         labels = "A", font = 2, cex = 1.5, xpd = TRUE)
    
    # 2. Variant Type
    vt_data <- as.data.frame(table(HCC_Final_non_intron@data$Variant_Type))
    colnames(vt_data) <- c("Variant_Type", "Frequency")
    vt_data <- vt_data[order(-vt_data$Frequency),]
    
    vt_colors <- rep("#1F77B4", nrow(vt_data))  # Default blue color
    for (i in 1:nrow(vt_data)) {
      if (vt_data$Variant_Type[i] %in% names(variant_colors)) {
        vt_colors[i] <- variant_colors[as.character(vt_data$Variant_Type[i])]
      }
    }
    
    # Set up margins and text parameters
    par(mar = c(5, 10, 4, 2), mgp = c(3, 0.7, 0), 
        cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
        family = "sans")
    
    # Calculate x-axis limit with some padding
    x_max <- max(vt_data$Frequency) * 1.15
    
    # Create horizontal barplot with no borders
    barplot_result <- barplot(vt_data$Frequency, 
            horiz = TRUE, 
            names.arg = FALSE,
            col = vt_colors,
            border = NA,
            xlim = c(0, x_max),
            xlab = "", 
            main = "Variant Type",
            axes = FALSE)
    
    # Add custom x-axis with gridlines
    axis(1, at = pretty(c(0, max(vt_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
    abline(v = pretty(c(0, max(vt_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
    
    # Add y-axis labels aligned right
    text(x = -max(vt_data$Frequency) * 0.05, 
         y = barplot_result, 
         labels = vt_data$Variant_Type, 
         xpd = TRUE, 
         adj = 1,
         cex = 0.9)
    
    # Add count labels at the end of each bar
    text(x = vt_data$Frequency + max(vt_data$Frequency) * 0.02, 
         y = barplot_result, 
         labels = vt_data$Frequency, 
         adj = 0, 
         cex = 0.8)
    
    # Add x-axis label
    mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
    
    # Add panel label
    text(x = -max(vt_data$Frequency) * 0.1, y = max(barplot_result) * 1.15, 
         labels = "B", font = 2, cex = 1.5, xpd = TRUE)
    
    # 3. SNV Class
    snp_data <- HCC_Final_non_intron@data[HCC_Final_non_intron@data$Variant_Type == "SNP", ]
    
    if (nrow(snp_data) > 0 && all(c("Reference_Allele", "Tumor_Seq_Allele2") %in% colnames(snp_data))) {
      snp_data$SNV_Class <- paste0(snp_data$Reference_Allele, ">", snp_data$Tumor_Seq_Allele2)
      std_classes <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
      snv_counts <- table(factor(snp_data$SNV_Class, levels = std_classes))
      
      snv_data <- data.frame(
        SNV_Class = names(snv_counts),
        Frequency = as.numeric(snv_counts)
      )
      snv_data <- snv_data[snv_data$Frequency > 0, ]
      snv_data <- snv_data[order(-snv_data$Frequency), ]
      
      plot_colors <- rep("#1F77B4", nrow(snv_data))  # Default blue color
      for (i in 1:nrow(snv_data)) {
        if (snv_data$SNV_Class[i] %in% names(snv_colors)) {
          plot_colors[i] <- snv_colors[as.character(snv_data$SNV_Class[i])]
        }
      }
      
      # Set up margins and text parameters
      par(mar = c(5, 10, 4, 2), mgp = c(3, 0.7, 0), 
          cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
          family = "sans")
      
      # Calculate x-axis limit with some padding
      x_max <- max(snv_data$Frequency) * 1.15
      
      # Create horizontal barplot with no borders
      barplot_result <- barplot(snv_data$Frequency, 
              horiz = TRUE, 
              names.arg = FALSE,
              col = plot_colors,
              border = NA,
              xlim = c(0, x_max),
              xlab = "", 
              main = "SNV Class",
              axes = FALSE)
      
      # Add custom x-axis with gridlines
      axis(1, at = pretty(c(0, max(snv_data$Frequency))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
      abline(v = pretty(c(0, max(snv_data$Frequency))), col = "lightgray", lty = "dotted", lwd = 0.7)
      
      # Add y-axis labels aligned right
      text(x = -max(snv_data$Frequency) * 0.05, 
           y = barplot_result, 
           labels = snv_data$SNV_Class, 
           xpd = TRUE, 
           adj = 1,
           cex = 0.9)
      
      # Add count labels at the end of each bar
      text(x = snv_data$Frequency + max(snv_data$Frequency) * 0.02, 
           y = barplot_result, 
           labels = snv_data$Frequency, 
           adj = 0, 
           cex = 0.8)
      
      # Add x-axis label
      mtext("Frequency", side = 1, line = 2.5, cex = 0.9)
      
      # Add panel label
      text(x = -max(snv_data$Frequency) * 0.1, y = max(barplot_result) * 1.15, 
           labels = "C", font = 2, cex = 1.5, xpd = TRUE)
    } else {
      # Just leave this panel blank if we can't create SNV plot
      par(mar = c(5, 10, 4, 2))
      plot(1, type = "n", axes = FALSE, xlab = "", ylab = "", main = "SNV Class")
      text(1, 1, "SNV Class data not available", cex = 1.2)
      
      # Add panel label
      text(0.5, 1.8, labels = "C", font = 2, cex = 1.5, xpd = TRUE)
    }
    
    dev.off()
    cat("Created Combined_Mutation_Summary.png\n")
  }, error = function(e) {
    cat("Error creating combined plot:", e$message, "\n")
  })
  
  # -------------------------------------------------------------------------------
  # 5. TOP MUTATED GENES PLOT
  # -------------------------------------------------------------------------------
  tryCatch({
    # Determine which genes to use
    top_genes <- NULL
    
    # If HCC_genes exists, use those genes
    if (exists("HCC_genes") && length(HCC_genes) > 0) {
      genes_in_data <- HCC_genes[HCC_genes %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)]
      if (length(genes_in_data) > 0) {
        top_genes <- genes_in_data
      }
    }
    
    # If no specific HCC genes or none found, use top 10 most mutated
    if (is.null(top_genes)) {
      gene_counts <- table(HCC_Final_non_intron@data$Hugo_Symbol)
      gene_counts <- sort(gene_counts, decreasing = TRUE)
      top_genes <- names(gene_counts)[1:min(10, length(gene_counts))]
    }
    
    # Count mutations per gene and get mutation types
    gene_counts <- numeric(length(top_genes))
    gene_mut_types <- list()
    
    for (i in 1:length(top_genes)) {
      gene <- top_genes[i]
      gene_data <- HCC_Final_non_intron@data[HCC_Final_non_intron@data$Hugo_Symbol == gene, ]
      gene_counts[i] <- nrow(gene_data)
      
      # Count mutation types
      if (nrow(gene_data) > 0) {
        mut_types <- table(gene_data$Variant_Classification)
        gene_mut_types[[gene]] <- mut_types
      }
    }
    
    # Order genes by mutation count
    gene_order <- order(-gene_counts)
    gene_counts <- gene_counts[gene_order]
    top_genes <- top_genes[gene_order]
    
    # Calculate percentages
    total_samples <- length(unique(HCC_Final_non_intron@data$Tumor_Sample_Barcode))
    gene_percentages <- paste0(round(gene_counts / total_samples * 100, 0), "%")
    
    # Create the plot
    png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Top_Mutated_Genes.png", width = 10, height = 8, units = "in", res = 300, bg = "white")
    
    # Set up the plot margins for horizontal bars
    par(mar = c(5, 8, 4, 4), mgp = c(3, 0.7, 0), 
        cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
        family = "sans")
    
    # Calculate x-axis limit with some padding
    x_max <- max(gene_counts) * 1.3
    
    # Create horizontal barplot
    barplot_result <- barplot(gene_counts, 
            horiz = TRUE, 
            names.arg = FALSE,
            col = "#4682B4",
            border = NA,
            xlim = c(0, x_max),
            axes = FALSE,
            main = "Top 10 mutated genes")
    
    # Add custom x-axis with gridlines
    axis(1, at = pretty(c(0, max(gene_counts))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
    abline(v = pretty(c(0, max(gene_counts))), col = "lightgray", lty = "dotted", lwd = 0.7)
    
    # Add y-axis labels (gene names in italics)
    text(x = -max(gene_counts) * 0.1, 
         y = barplot_result, 
         labels = top_genes, 
         xpd = TRUE, 
         adj = 1,
         cex = 0.9,
         font = 3)  # 3 = italic
    
    # Add percentage labels
    text(x = gene_counts + max(gene_counts) * 0.05, 
         y = barplot_result, 
         labels = gene_percentages, 
         xpd = TRUE, 
         adj = 0,
         cex = 0.9,
         font = 2)  # 2 = bold
    
    # Add x-axis label
    mtext("Number of mutations", side = 1, line = 2.5, cex = 0.9)
    
    dev.off()
    cat("Created Top_Mutated_Genes.png\n")
    
    # Try to create a stacked bar version if we have mutation type data
    if (length(gene_mut_types) > 0) {
      tryCatch({
        # Set up mutation type colors
        mut_type_colors <- mutation_colors
        
        # Create the plot
        png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/Top_Mutated_Genes_Stacked.png", width = 12, height = 8, units = "in", res = 300, bg = "white")
        
        # Set up the plot area
        par(mar = c(5, 9, 4, 10), mgp = c(3, 0.7, 0), 
            cex.axis = 0.9, cex.lab = 1, cex.main = 1.3,
            family = "sans")
        
        # Create empty plot area
        plot(0, 0, type = "n", 
             xlim = c(0, max(gene_counts) * 1.3), 
             ylim = c(0.5, length(top_genes) + 0.5),
             xlab = "", ylab = "", axes = FALSE,
             main = "Top mutated genes by mutation type")
        
        # Add custom x-axis with gridlines
        axis(1, at = pretty(c(0, max(gene_counts))), lwd = 0.5, tck = -0.01, cex.axis = 0.9)
        abline(v = pretty(c(0, max(gene_counts))), col = "lightgray", lty = "dotted", lwd = 0.7)
        
        # Add x-axis label
        mtext("Number of mutations", side = 1, line = 2.5, cex = 0.9)
        
        # Add gene names on y-axis
        text(x = -max(gene_counts) * 0.05, 
             y = length(top_genes):1, 
             labels = top_genes, 
             xpd = TRUE, 
             adj = 1,
             cex = 0.9,
             font = 3)  # 3 = italic
        
        # Add percentage labels
        text(x = gene_counts + max(gene_counts) * 0.05, 
             y = length(top_genes):1, 
             labels = gene_percentages, 
             xpd = TRUE, 
             adj = 0,
             cex = 0.9,
             font = 2)  # 2 = bold
        
        # Create stacked bars
        for (i in 1:length(top_genes)) {
          gene <- top_genes[i]
          if (gene %in% names(gene_mut_types)) {
            mut_counts <- gene_mut_types[[gene]]
            
            # Start position for each segment
            x_start <- 0
            
            # Sort mutation types
            mut_counts <- sort(mut_counts, decreasing = TRUE)
            
            for (mut_type in names(mut_counts)) {
              count <- mut_counts[mut_type]
              
              # Choose color
              if (mut_type %in% names(mut_type_colors)) {
                color <- mut_type_colors[mut_type]
              } else {
                color <- "#CCCCCC"  # Gray for unknown types
              }
              
              # Draw rectangle for this mutation type
              rect(x_start, length(top_genes) - i + 0.7, 
                   x_start + count, length(top_genes) - i + 1.3,
                   col = color, border = NA)
              
              # Move x_start for next segment
              x_start <- x_start + count
            }
          }
        }
        
        # Add legend
        all_mut_types <- unique(unlist(lapply(gene_mut_types, names)))
        legend_mut_types <- intersect(all_mut_types, names(mut_type_colors))
        
        if (length(legend_mut_types) > 0) {
          legend_colors <- mut_type_colors[legend_mut_types]
          
          legend("topright", 
                 legend = legend_mut_types,
                 fill = legend_colors,
                 border = NA,
                 bty = "n",
                 cex = 0.8,
                 xpd = TRUE,
                 inset = c(-0.25, 0))
        }
        
        dev.off()
        cat("Created Top_Mutated_Genes_Stacked.png\n")
      }, error = function(e) {
        cat("Error creating stacked bar plot:", e$message, "\n")
      })
    }
  }, error = function(e) {
    cat("Error creating Top Mutated Genes plot:", e$message, "\n")
  })
  
  cat("\nAll annotated plots have been saved to the 'publication_figures' directory.\n")
}
## Created Variant_Classification.png
## Created Variant_Type.png
## Created SNV_Class.png
## Created Combined_Mutation_Summary.png
## Created Top_Mutated_Genes.png
## Created Top_Mutated_Genes_Stacked.png
## 
## All annotated plots have been saved to the 'publication_figures' directory.

3.2 Oncoplot (Waterfall Plot)

if (exists("HCC_Final_non_intron") && exists("HCC_genes")) {
  # Generate oncoplot for HCC genes
  tryCatch({
    oncoplot(
      maf = HCC_Final_non_intron, 
      genes = HCC_genes,
      clinicalFeatures = c('Aetiology', 'BCLC', 'Cirrhosis'),
      sortByAnnotation = TRUE,
      annotationColor = NULL,
      removeNonMutated = FALSE,
      showTumorSampleBarcodes = TRUE,
      fontSize = 0.7,
      SampleNamefontSize = 0.8,
      legendFontSize = 1,
      annotationFontSize = 0.8,
      showTitle = TRUE,
      titleText = "Mutation Profile of Key HCC Genes",
      showPct = TRUE
    )
  }, error = function(e) {
    cat("Error in generating oncoplot:", e$message, "\n")
    cat("Trying with fewer options.\n")
    
    # Simplified oncoplot
    oncoplot(
      maf = HCC_Final_non_intron,
      genes = HCC_genes,
      showTumorSampleBarcodes = TRUE
    )
  })
}

# ------------------------------------------------------------------
# Extended oncoplot – top 18 genes with detailed clinical tracks
# ------------------------------------------------------------------
if (exists("HCC_Final_non_intron")) {

  #--- make sure the clinical annotation names really exist -----------
  anno_cols <- colnames(HCC_Final_non_intron@clinical.data)

  # Use the long name only if it is available, otherwise fall back to BCLC
  clin_feats <- c(
    'Aetiology',
    'Condition1',
    if ('Barcelona_Clinic_Liver_Cancer' %in% anno_cols) {
      'Barcelona_Clinic_Liver_Cancer'
    } else if ('BCLC' %in% anno_cols) {
      'BCLC'
    } else {
      NULL
    }
  )

  #--------------------------------------------------------------------
  tryCatch({

    oncoplot(
      maf                 = HCC_Final_non_intron,
      top                 = 18,
      genes               = c("APC", "ARID1A", "ARID2", "ATM", "AXIN1", "CDKN2A",
                              "CTNNB1", "HNF1A", "JAK1", "KEAP1", "LZTR1",
                              "PIK3CA", "PTEN", "RB1", "SF3B1", "SMARCA4",
                              "TERT", "TP53"),
      altered             = FALSE,
      drawRowBar          = TRUE,
      drawColBar          = TRUE,
      includeColBarCN     = TRUE,
      clinicalFeatures    = clin_feats,
      annotationColor     = NULL,
      draw_titv           = FALSE,
      showTumorSampleBarcodes = TRUE,
      barcodeSrt          = 90,
      gene_mar            = 5,
      anno_height         = 1,
      legend_height       = 4,
      sortByAnnotation    = FALSE,
      groupAnnotationBySize = TRUE,
      sortByMutation      = FALSE,
      keepGeneOrder       = FALSE,
      GeneOrderSort       = TRUE,
      removeNonMutated    = TRUE,
      bgCol               = "#CCCCCC",
      borderCol           = "white",
      fontSize            = 0.7,
      SampleNamefontSize  = 0.9,
      titleFontSize       = 1.5,
      legendFontSize      = 1.2,
      annotationFontSize  = 1.2,
      showTitle           = TRUE,
      titleText           = "Top 18 Mutated Genes – Detailed Oncoplot",
      showPct             = TRUE
    )

  }, error = function(e) {
    message("Oncoplot (top 18) could not be generated: ", e$message)
  })
}

3.3 Transition and Transversions

laml.titv = titv(maf = HCC_Final_non_intron, plot = TRUE, useSyn = TRUE)

#plot titv summary
plotTiTv(
  res = laml.titv,
  plotType = "both",
  sampleOrder = HCC_Final_non_intron@clinical.data$Tumor_Sample_Barcode,
  color = NULL,
  showBarcodes = TRUE,
  textSize = 0.8,
  baseFontSize = 1,
  axisTextSize = c(1, 1),
  plotNotch = FALSE
)

3.4 Lollipop Plots for Key Genes

3.4.1 TP53

if (exists("HCC_Final_non_intron")) {
  # Create lollipop plot for TP53
  if ("TP53" %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)) {
    tryCatch({
      lollipopPlot(
        maf = HCC_Final_non_intron,
        gene = "TP53",
        AACol = "Protein_Change",
        showMutationRate = TRUE,
        labelPos = "all"
      )
    }, error = function(e) {
      cat("Error in generating TP53 lollipop plot:", e$message, "\n")
      cat("TP53 mutations are present but plot could not be generated.\n")
    })
  } else {
    cat("No TP53 mutations found in this dataset.\n")
  }
}
##      HGNC    refseq.ID   protein.ID aa.length
##    <char>       <char>       <char>     <num>
## 1:   TP53    NM_000546    NP_000537       393
## 2:   TP53 NM_001126112 NP_001119584       393
## 3:   TP53 NM_001126113 NP_001119585       346
## 4:   TP53 NM_001126114 NP_001119586       341
## 5:   TP53 NM_001126115 NP_001119587       261
## 6:   TP53 NM_001126116 NP_001119588       209
## 7:   TP53 NM_001126117 NP_001119589       214
## 8:   TP53 NM_001126118 NP_001119590       354

3.4.2 CTNNB1

if (exists("HCC_Final_non_intron")) {
  # Create lollipop plot for CTNNB1
  if ("CTNNB1" %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)) {
    tryCatch({
      lollipopPlot(
        maf = HCC_Final_non_intron,
        gene = "CTNNB1",
        AACol = "Protein_Change",
        showMutationRate = TRUE,
        labelPos = "all"
      )
    }, error = function(e) {
      cat("Error in generating CTNNB1 lollipop plot:", e$message, "\n")
      cat("CTNNB1 mutations are present but plot could not be generated.\n")
    })
  } else {
    cat("No CTNNB1 mutations found in this dataset.\n")
  }
}
##      HGNC    refseq.ID   protein.ID aa.length
##    <char>       <char>       <char>     <num>
## 1: CTNNB1 NM_001098209 NP_001091679       781
## 2: CTNNB1 NM_001098210 NP_001091680       781
## 3: CTNNB1    NM_001904    NP_001895       781

3.4.3 TERT

if (exists("HCC_Final_non_intron")) {
  # Create lollipop plot for TERT
  if ("TERT" %in% unique(HCC_Final_non_intron@data$Hugo_Symbol)) {
    tryCatch({
      lollipopPlot(
        maf = HCC_Final_non_intron,
        gene = "TERT",
        AACol = "Protein_Change",
        showMutationRate = TRUE,
        labelPos = "all"
      )
    }, error = function(e) {
      cat("Error in generating TERT lollipop plot:", e$message, "\n")
      cat("TERT mutations are present but plot could not be generated.\n")
    })
  } else {
    cat("No TERT mutations found in this dataset.\n")
  }
}
##      HGNC    refseq.ID   protein.ID aa.length
##    <char>       <char>       <char>     <num>
## 1:   TERT NM_001193376 NP_001180305      1069
## 2:   TERT    NM_198253    NP_937983      1132

3.5 Mutation Co-occurrence and Exclusivity

png("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/somatic_interactions.png", width = 12, height = 9, units = "in", res = 300)
par(mar = c(5, 8, 4, 2))  # Increase left margin (2nd value)

somaticInteractions(
  maf = HCC_Final_non_intron,
  genes = HCC_genes,
  pvalue = c(0.05, 0.1),
  leftMar = 8,
  topMar = 8,# Default is 4, increase to 8 or more
  fontSize = 1,
  showSigSymbols = TRUE,
  returnAll = FALSE  # Make sure this is FALSE to get a plot instead of data
)
dev.off()
## quartz_off_screen 
##                 2

3.6 Plotting VAF

# Define the list of HCC genes to include
HCC_genes <- c(
  "APC", "ARID1A", "ARID2", "ATM", "AXIN1", "CDKN2A", "CTNNB1", "HNF1A", 
  "JAK1", "KEAP1", "LZTR1", "PIK3CA", "PTEN", "RB1", "SF3B1", 
  "SMARCA4", "TERT", "TP53"
)

# Define the extract_vaf_data function
extract_vaf_data <- function(maf_object, group_label) {
  # Extract the variants data from the MAF object
  variants <- maf_object@data
  
  # Select relevant columns and add group label
  result <- variants %>%
    select(Hugo_Symbol, tumor_f) %>%
    mutate(Group = group_label)
  
  return(result)
}

# Extract and process data as before
pre_data <- extract_vaf_data(pre_tace_samples, "Pre-TACE")
post_data <- extract_vaf_data(post_tace_samples, "Post-TACE")

# Combine the data
combined_data <- bind_rows(pre_data, post_data)

# Filter out NA values and restrict to only the specified HCC genes
combined_data <- combined_data %>%
  filter(!is.na(tumor_f)) %>%
  filter(Hugo_Symbol != "NA" & !is.na(Hugo_Symbol) & Hugo_Symbol != "") %>%
  filter(Hugo_Symbol %in% HCC_genes)  # Add this line to filter for HCC genes only

# Important: Set factor levels to control the order
combined_data$Group <- factor(combined_data$Group, levels = c("Pre-TACE", "Post-TACE"))
# Set gene order to match the original vector order
combined_data$Hugo_Symbol <- factor(combined_data$Hugo_Symbol, levels = HCC_genes)

# Create side-by-side boxplots with Pre-TACE first
vaf_plot_ordered <- ggplot(combined_data, aes(x = Hugo_Symbol, y = tumor_f, fill = Group)) +
  geom_boxplot(position = position_dodge(width = 0.8), width = 0.7, outlier.size = 0.5, na.rm = TRUE) +
  scale_fill_manual(values = c("Pre-TACE" = "#3498db", "Post-TACE" = "#e74c3c")) +
  labs(
    title = "VAF Comparison in HCC-Related Genes",
    x = "Gene",
    y = "Variant Allele Frequencies (VAF)",
    fill = ""
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
    axis.text.y = element_text(size = 10),
    legend.position = "top",
    legend.title = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Save the plot
ggsave("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/pre_post_vaf_comparison_hcc_genes.png", vaf_plot_ordered, width = 10, height = 6)

# For the stacked vertical panels version, the same factor setting ensures correct order
vaf_plot_stacked <- ggplot(combined_data, aes(x = Hugo_Symbol, y = tumor_f, fill = Group)) +
  geom_boxplot(outlier.size = 0.5) +
  scale_fill_manual(values = c("Pre-TACE" = "#3498db", "Post-TACE" = "#e74c3c")) +
  facet_grid(Group ~ ., scales = "free_y") +  # Pre-TACE will now appear on top
  labs(
    title = "VAF Comparison in HCC-Related Genes",
    x = "Gene",
    y = "Variant Allele Frequencies (VAF)",
    fill = ""
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
    axis.text.y = element_text(size = 10),
    strip.text = element_text(size = 12, face = "bold"),
    legend.position = "none",
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Save the plots
ggsave("/Users/sultanalharbi/Library/CloudStorage/OneDrive-Personal/Projects/Thesis_Chapters/Chapter 1 (cfDNA-TACE for HCC)/TACE_Results/vaf_comparison_stacked_panels_hcc_genes.png", vaf_plot_stacked, width = 10, height = 8)
print(vaf_plot_ordered)

print(vaf_plot_stacked)

3.7 Compare Two Cohorts (MAF)

# Enhanced wrapper function that completely rewrites the forestPlot function
# but removes sample counts from the title and shows actual p-values as decimals
forestPlot_noSamples <- function(mafCompareRes, pVal = 0.05, fdr = NULL,
                                 color=c('maroon','royalblue'),
                                 geneFontSize = 0.8, titleSize = 1.2, lineWidth = 1){
  
  if(is.null(fdr)){
    m.sigs = mafCompareRes$results[pval < pVal]
  }else{
    m.sigs = mafCompareRes$results[adjPval < fdr]
  }
  
  m1Name = mafCompareRes$SampleSummary[1, Cohort]
  m2Name = mafCompareRes$SampleSummary[2, Cohort]
  
  m1.sampleSize = mafCompareRes$SampleSummary[1, SampleSize]
  m2.sampleSize = mafCompareRes$SampleSummary[2, SampleSize]
  
  # Handle color parameters
  if (length(color)==1){
    vc_col = c(color,color)
  }else if(length(color==2)){
    vc_col = color
  }else{
    stop('colors length must be less equal than 2')
  }
  
  if (is.null(names(vc_col))){
    names(vc_col)=c(m2Name,m1Name)
  }else if (!(m1Name %in% names(vc_col)) && !(m2Name %in% names(vc_col))){
    stop(paste0('\ninput named vector [color] must contain both group name, \nwhich should be like c(',
                m1Name,', ', m2Name,') but now are ',list(names(vc_col)),'\n'))
  }else if (!(m1Name %in% names(vc_col)) || !(m2Name %in% names(vc_col))){
    stop(paste0('\nif you pass [color] with named vector, then both of the names matching group names must be Explicitly declared,\n',
                'which should be like c(',
                m1Name,', ', m2Name,') but now are ',list(names(vc_col)),'\n'))
  }
  
  if(nrow(m.sigs) < 1){
    stop('No differetially mutated genes found !')
  }
  
  m.sigs$Hugo_Symbol = factor(x = m.sigs$Hugo_Symbol, levels = rev(m.sigs$Hugo_Symbol))
  
  m.sigs$or_new = ifelse(test = m.sigs$or > 3, yes = 3, no = m.sigs$or)
  m.sigs$upper = ifelse(test = m.sigs$ci.up > 3, yes = 3, no = m.sigs$ci.up)
  m.sigs$lower = ifelse(test = m.sigs$ci.low > 3, yes = 3, no = m.sigs$ci.low)
  #Reverse by significance
  m.sigs$pos = rev(1:nrow(m.sigs))
  m.sigs = m.sigs[order(pos)]
  m.sigs$pos = 1:nrow(m.sigs)
  xlims = c(0, 4)
  ylims = c(0.75, nrow(m.sigs))
  
  graphics::layout(mat = matrix(c(1, 2, 3, 4, 5, 6, 6, 6, 6, 6), byrow = TRUE, ncol = 5, nrow = 2), widths = c(4, 1, 1), heights = c(6, 1.2))
  par(mar = c(3, 1, 3, 5))
  plot(NA, xlim = xlims, ylim = ylims, axes= FALSE, xlab = NA, ylab = NA)
  
  apply(m.sigs[,.(or, ci.up, ci.low, ci.up, or_new, upper, lower, pos)], 1, function(x){
    p = x[5]; u = x[6]; u_orig = x[2]; l = x[7]; l_orig = x[3]; ypos = x[8]
    if (p<1){
      linecolor = vc_col[m2Name]
    }else if(p>1){
      linecolor = vc_col[m1Name]
    }else{
      linecolor = 'black'
    }
    points(x = p, y = ypos, pch = 16, cex = 1.1*(lineWidth))
    segments(x0 = l, y0 = ypos, x1 = u, y1 = ypos, lwd = lineWidth,col = linecolor)
    
    if(u_orig >3){
      segments(x0 = 3, y0 = ypos, x1 = 3.25, y1 = ypos, lwd = lineWidth,col=linecolor)
      points(x = 3.25, y = ypos, pch = ">", cex = 1.1*(lineWidth))
    }
    if(l_orig >3){
      segments(x0 = 3, y0 = ypos, x1 = 3.25, y1 = ypos, lwd = lineWidth,col=linecolor)
      points(x = 3.25, y = ypos, pch = ">", cex = 1.1*(lineWidth))
    }
    
  })
  
  abline(v = 1, lty = 2, col = "gray", xpd = FALSE)
  axis(side = 1, at = 0:3, labels = c(0:3), font = 1, pos = 0.5, cex.axis = 1.3)
  
  mtext(text = m.sigs$Hugo_Symbol, side = 4, line = 0.2, at = 1:nrow(m.sigs),
        font = 3, las= 2, cex = geneFontSize, adj = 0)
  
  # KEY CHANGE: Remove sample counts from title
  mtitle = paste(m1Name, ' Vs. ', m2Name, sep='')
  title(main = mtitle, font = 1, adj = 0, cex.main = titleSize)
  
  # Plot annotation columns
  par(mar = c(3, 0, 3, 0))
  plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
       pch = NA, xlab = "", ylab = "", ylim = ylims)
  text(x = 0.5, y = 1:nrow(m.sigs), labels = as.numeric(unlist(m.sigs[,3])),
       adj = 0, font = 1, cex = 1.4*(geneFontSize))
  title(main = m2Name, cex.main = titleSize)
  
  par(mar = c(3, 0, 3, 0))
  plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
       pch = NA, xlab = "", ylab = "", ylim = ylims)
  text(x = 0.5, y = 1:nrow(m.sigs), labels = as.numeric(unlist(m.sigs[,2])),
       adj = 0, font = 1, cex = 1.4*(geneFontSize))
  title(main = m1Name, cex.main = titleSize)
  
  par(mar = c(3, 0, 3, 0))
  plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
       pch = NA, xlab = "", ylab = "", ylim = ylims)
  text(x = 0.5, y = 1:nrow(m.sigs), labels = round(m.sigs$or, digits = 3),
       adj = 0.5, font = 1, cex = 1.4*(geneFontSize))
  title(main = "OR", cex.main = titleSize)
  
  # MODIFIED CODE: Show actual p-values as decimal numbers instead of "NS"
  m.sigs$significance = ifelse(test = as.numeric(m.sigs$pval) < 0.001, yes = "***", no =
                                 ifelse(test = as.numeric(m.sigs$pval) < 0.01, yes = "**", no =
                                          ifelse(test = as.numeric(m.sigs$pval) < 0.05, yes = "*", no = 
                                                   format(round(as.numeric(m.sigs$pval), 3), nsmall=3))))
  
  par(mar = c(3, 0, 3, 0))
  plot(rep(0, nrow(m.sigs)), 1:nrow(m.sigs), xlim = c(0, 1), axes = FALSE,
       pch = NA, xlab = "", ylab = "", ylim = ylims)
  text(x = 0.5, y = 1:nrow(m.sigs), labels = m.sigs$significance,
       adj = 0, font = 1, cex = 1.4*(geneFontSize))
  title(main = "P-value", cex.main = titleSize)
  
  par(mar = c(0, 0, 0, 0))
  plot(NA, xlim = c(0,1), ylim = c(0, 1), axes = FALSE, xlab = NA, ylab = NA)
  
  text(x = 0, labels = paste0(
    "Odds ratio with 95% CI\n(1 = no effect, < 1 ",
    m2Name,
    " has more mutants)"
  ), y = 0.6, adj = 0, xpd = TRUE, cex = 1.2)
}
# Run mafCompare analysis
res = mafCompare(
  m1 = ALLBCLC_NoC,
  m2 = BCLC_C,
  m1Name = 'A+B',
  m2Name = 'C',
  minMut = 1,
  useCNV = FALSE,
  pathways = NULL,
  custom_pw = NULL,
  pseudoCount = TRUE
)

# Print the comparison results
#print(res)

# Use the modified forest plot function
forestPlot_noSamples(
  mafCompareRes = res,
  pVal = 0.5,
  lineWidth = 2,
  color = c('royalblue', 'maroon'),
  geneFontSize = 1
)

# Run mafCompare analysis
res = mafCompare(
  m1 = ALLsample_Viralsamples,
  m2 = ALLsample_NoViral,
  m1Name = 'Viral',
  m2Name = 'Non-Viral',
  minMut = 1,
  useCNV = FALSE,
  pathways = NULL,
  custom_pw = NULL,
  pseudoCount = TRUE
)

# Print the comparison results
#print(res)

# Use the modified forest plot function
forestPlot_noSamples(
  mafCompareRes = res,
  pVal = 0.5,
  lineWidth = 2,
  color = c('royalblue', 'maroon'),
  geneFontSize = 1
)